{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 80
    },
    "colab_type": "code",
    "id": "w3q8-2lkmd1p",
    "outputId": "ecbc04e0-c368-4db6-c70b-3e6bc4f19253"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2.0.0\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "print(tf.__version__) # Should be >= 2.0.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 232
    },
    "colab_type": "code",
    "id": "xwVSKjMSmd1t",
    "outputId": "da88f82e-f012-445d-8dba-971c9711f06d"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "75.0\n"
     ]
    }
   ],
   "source": [
    "# 1.0 - Introduction\n",
    "x = tf.constant(5.0)\n",
    "with tf.GradientTape() as tape:\n",
    "    tape.watch(x)\n",
    "    y = x**3\n",
    "\n",
    "print(tape.gradient(y, x).numpy())\n",
    "\n",
    "del tape, x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "P3SCdRRrmd1w"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "108.0\n"
     ]
    }
   ],
   "source": [
    "# 1.1 - Automatically Watching Variables\n",
    "x = tf.Variable(6.0, trainable=True) # Error on: tf.constant(6.0) or tf.Variable(6.0, trainable=False)\n",
    "with tf.GradientTape() as tape:\n",
    "    y = x**3\n",
    "\n",
    "print(tape.gradient(y, x).numpy())\n",
    "\n",
    "del tape, x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "HsrMx2hTmd1z"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "None\n"
     ]
    }
   ],
   "source": [
    "# 1.2 - watch_accessed_variables=False\n",
    "x = tf.Variable(3.0, trainable=True)\n",
    "with tf.GradientTape(watch_accessed_variables=False) as tape:\n",
    "    y = x**3\n",
    "\n",
    "print(tape.gradient(y, x))\n",
    "\n",
    "del tape, x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "Ja7wQ_fAmd11"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "18.0\n"
     ]
    }
   ],
   "source": [
    "# 1.3 - Higher-Order Derivatives\n",
    "x = tf.Variable(3.0, trainable=True)\n",
    "with tf.GradientTape() as tape1:\n",
    "    with tf.GradientTape() as tape2:\n",
    "        y = x ** 3\n",
    "    order_1 = tape2.gradient(y, x)\n",
    "order_2 = tape1.gradient(order_1, x)\n",
    "\n",
    "print(order_2.numpy())\n",
    "\n",
    "del x, tape1, tape2, order_1, order_2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "-RSXM_KTmd14"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "12.0\n",
      "12.0\n"
     ]
    }
   ],
   "source": [
    "# 1.4 - persistent=True\n",
    "a = tf.Variable(6.0, trainable=True)\n",
    "b = tf.Variable(2.0, trainable=True)\n",
    "with tf.GradientTape(persistent=True) as tape:\n",
    "    y1 = a ** 2\n",
    "    y2 = b ** 3\n",
    "                                                                                                                                                                                                                                                                                                                                                \n",
    "print(tape.gradient(y1, a).numpy())\n",
    "print(tape.gradient(y2, b).numpy())\n",
    "\n",
    "del a, b, tape, y1, y2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "1-Z8P3pHmd17"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "27.0\n"
     ]
    }
   ],
   "source": [
    "# 1.5 - stop_recording()\n",
    "x = tf.Variable(3.0, trainable=True)\n",
    "with tf.GradientTape() as tape:\n",
    "    y = x**3\n",
    "    with tape.stop_recording():\n",
    "        # Putting tape.gradient outside of a stop_recording block, but within the tape block, will lead to a warning about inefficency\n",
    "        print(tape.gradient(y, x).numpy())\n",
    "\n",
    "del tape, x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "EK-YuK4rmd19"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "12.0\n",
      "12.0\n"
     ]
    }
   ],
   "source": [
    "# 1.5.1\n",
    "# I prefer the following to what we did in part 1.4\n",
    "a = tf.Variable(6.0, trainable=True)\n",
    "b = tf.Variable(2.0, trainable=True)\n",
    "with tf.GradientTape(persistent=True) as tape:\n",
    "    y1 = a ** 2\n",
    "    with tape.stop_recording():\n",
    "        print(tape.gradient(y1, a).numpy())\n",
    "    \n",
    "    y2 = b ** 3\n",
    "    with tape.stop_recording():\n",
    "        print(tape.gradient(y2, b).numpy())\n",
    "                                                                                                                                                                                                                                                                                                                                                \n",
    "\n",
    "del a, b, tape, y1, y2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "GCFPz_QXmd1_"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "y ≈ 10.054533958435059x + 5.001296043395996\n"
     ]
    }
   ],
   "source": [
    "# 2.0 - Linear Regression\n",
    "import random\n",
    "import numpy as np\n",
    "\n",
    "# Loss function\n",
    "def loss(real_y, pred_y):\n",
    "    return tf.abs(real_y - pred_y)\n",
    "\n",
    "# Training data\n",
    "x_train = np.asarray([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])\n",
    "y_train = np.asarray([i*10+5 for i in x_train]) # y = 10x+5\n",
    "\n",
    "# Trainable variables\n",
    "a = tf.Variable(random.random(), trainable=True)\n",
    "b = tf.Variable(random.random(), trainable=True)\n",
    "\n",
    "# Step function\n",
    "def step(real_x, real_y):\n",
    "    with tf.GradientTape(persistent=True) as tape:\n",
    "        # Make prediction\n",
    "        pred_y = a * real_x + b\n",
    "        # Calculate loss\n",
    "        reg_loss = loss(real_y, pred_y)\n",
    "    \n",
    "    # Calculate gradients\n",
    "    a_gradients, b_gradients = tape.gradient(reg_loss, (a, b))\n",
    "\n",
    "    # Update variables\n",
    "    a.assign_sub(a_gradients * 0.001)\n",
    "    b.assign_sub(b_gradients * 0.001)\n",
    "\n",
    "# Training loop\n",
    "for _ in range(10000):\n",
    "    step(x_train, y_train)\n",
    "\n",
    "print(f'y ≈ {a.numpy()}x + {b.numpy()}')\n",
    "\n",
    "del a, b, x_train, y_train, step, loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "GoZgC8gPmd2B"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "y ≈ 6.002356052398682x^2 + 7.548577308654785x + 1.9996548891067505\n"
     ]
    }
   ],
   "source": [
    "# 2.1 - Polynomial Regression\n",
    "import random\n",
    "import numpy as np\n",
    "\n",
    "# Loss function\n",
    "def loss(real_y, pred_y):\n",
    "    return tf.abs(real_y - pred_y)\n",
    "\n",
    "# Training data\n",
    "x_train = np.asarray([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])\n",
    "y_train = np.asarray([6*i**2 + 8*i + 2 for i in x_train]) # y = 6x^2 + 8x + 2\n",
    "\n",
    "# Trainable variables\n",
    "a = tf.Variable(random.random(), trainable=True)\n",
    "b = tf.Variable(random.random(), trainable=True)\n",
    "c = tf.Variable(random.random(), trainable=True)\n",
    "\n",
    "# Step function\n",
    "def step(real_x, real_y):\n",
    "    with tf.GradientTape(persistent=True) as tape:\n",
    "        # Make prediction\n",
    "        pred_y = a*real_x**2 + b*real_x + c\n",
    "        # Calculate loss\n",
    "        poly_loss = loss(real_y, pred_y)\n",
    "    \n",
    "    # Calculate gradients\n",
    "    a_gradients, b_gradients, c_gradients = tape.gradient(poly_loss, (a, b, c))\n",
    "\n",
    "    # Update variables\n",
    "    a.assign_sub(a_gradients * 0.001)\n",
    "    b.assign_sub(b_gradients * 0.001)\n",
    "    c.assign_sub(c_gradients * 0.001)\n",
    "\n",
    "# Training loop\n",
    "for _ in range(10000):\n",
    "    step(x_train, y_train)\n",
    "\n",
    "print(f'y ≈ {a.numpy()}x^2 + {b.numpy()}x + {c.numpy()}')\n",
    "\n",
    "del a, b, x_train, y_train, step, loss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "7P2dGYSkmd2E",
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING: Logging before flag parsing goes to stderr.\n",
      "W1128 08:05:19.897752 140520508213056 base_layer.py:1814] Layer conv2d is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.\n",
      "\n",
      "If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.\n",
      "\n",
      "To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "=========================\n",
      "Accuracy: 0.9899\n"
     ]
    }
   ],
   "source": [
    "# 2.2 - Classifying MNIST\n",
    "from tensorflow.keras.layers import Conv2D, Flatten, Dense, Dropout, MaxPooling2D\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.initializers import RandomNormal\n",
    "from tensorflow.keras.datasets import mnist\n",
    "from tensorflow.keras.optimizers import Adam\n",
    "import matplotlib.pyplot as plt\n",
    "import random\n",
    "import math\n",
    "%matplotlib inline\n",
    "\n",
    "# Load and pre-process training data\n",
    "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n",
    "x_train = (x_train / 255).reshape((-1, 28, 28, 1))\n",
    "y_train = tf.keras.utils.to_categorical(y_train, 10)\n",
    "x_test = (x_test / 255).reshape((-1, 28, 28, 1))\n",
    "y_test = tf.keras.utils.to_categorical(y_test, 10)\n",
    "\n",
    "# Hyperparameters\n",
    "batch_size = 128\n",
    "epochs = 25\n",
    "optimizer = Adam(lr=0.001)\n",
    "weight_init = RandomNormal()\n",
    "\n",
    "# Build model\n",
    "model = Sequential()\n",
    "model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', kernel_initializer=weight_init, input_shape=(28, 28, 1)))\n",
    "model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer=weight_init))\n",
    "model.add(MaxPooling2D(pool_size=(2, 2)))\n",
    "model.add(Dropout(0.25))\n",
    "model.add(Flatten())\n",
    "model.add(Dense(128, activation='relu', kernel_initializer=weight_init))\n",
    "model.add(Dropout(0.5))\n",
    "model.add(Dense(10, activation='softmax', kernel_initializer=weight_init))\n",
    "\n",
    "# Step function\n",
    "def step(real_x, real_y):\n",
    "    with tf.GradientTape() as tape:\n",
    "        # Make prediction\n",
    "        pred_y = model(real_x.reshape((-1, 28, 28, 1)))\n",
    "        # Calculate loss\n",
    "        model_loss = tf.keras.losses.categorical_crossentropy(real_y, pred_y)\n",
    "    \n",
    "    # Calculate gradients\n",
    "    model_gradients = tape.gradient(model_loss, model.trainable_variables)\n",
    "    # Update model\n",
    "    optimizer.apply_gradients(zip(model_gradients, model.trainable_variables))\n",
    "\n",
    "# Training loop\n",
    "bat_per_epoch = math.floor(len(x_train) / batch_size)\n",
    "for epoch in range(epochs):\n",
    "    print('=', end='')\n",
    "    for i in range(bat_per_epoch):\n",
    "        n = i*batch_size\n",
    "        step(x_train[n:n+batch_size], y_train[n:n+batch_size])\n",
    "\n",
    "# Calculate accuracy\n",
    "model.compile(optimizer=optimizer, loss=tf.losses.categorical_crossentropy, metrics=['acc']) # Compile just for evaluation\n",
    "print('\\nAccuracy:', model.evaluate(x_test, y_test, verbose=0)[1])"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [],
   "machine_shape": "hm",
   "name": "GradientTape-explained.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
